Libraries
import os
import pandas as pd
import numpy as np
import scipy.stats as st
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
Reading the Data
list_path = "Data/Oakland_PD_2019.csv"
ripa_df = pd.read_csv(list_path)
ripa_df.head(5)
| DOJ_RECORD_ID | PERSON_NUMBER | AGENCY_ORI | AGENCY_NAME | TIME_OF_STOP | DATE_OF_STOP | STOP_DURATION | CLOSEST_CITY | SCHOOL_CODE | SCHOOL_NAME | ... | ROS_NONCRIMINAL_TRANSPORT | ROS_CONTACT_LEGAL_GUARDIAN | ROS_PSYCH_HOLD | ROS_US_HOMELAND | ROS_REFERRAL_SCHOOL_ADMIN | ROS_REFERRAL_SCHOOL_COUNSELOR | ROS_WARNING_CDS | ROS_CITATION_CDS | ROS_IN_FIELD_CITE_RELEASE_CDS | ROS_CUSTODIAL_WOUT_WARRANT_CDS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | S01092009115Y0G48V7P | 1 | CA0010900 | OAKLAND PD | 1500 | 26-JAN-19 | 10 | OAKLAND | NaN | NaN | ... | 0 | 0 | 0 | 0 | 0 | 0 | NaN | 54011 | NaN | NaN |
| 1 | W7110192824A1JIBP56D | 1 | CA0371100 | SAN DIEGO PD | 1541 | 25-JUN-19 | 5 | LA MESA | NaN | NaN | ... | 0 | 0 | 0 | 0 | 0 | 0 | NaN | 66205 | NaN | NaN |
| 2 | S010920091KTGP7LC9K9 | 1 | CA0010900 | OAKLAND PD | 1616 | 19-JUN-19 | 40 | OAKLAND | NaN | NaN | ... | 0 | 0 | 0 | 0 | 0 | 0 | 54566 | NaN | NaN | 24054, 28035, 50037 |
| 3 | S010920091AX0B2XG8RH | 1 | CA0010900 | OAKLAND PD | 1824 | 08-JAN-19 | 15 | OAKLAND | NaN | NaN | ... | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN |
| 4 | S0109200910BGJ8W1GDW | 1 | CA0010900 | OAKLAND PD | 2320 | 02-MAY-19 | 30 | OAKLAND | NaN | NaN | ... | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | 22004, 22012 |
5 rows × 118 columns
RAE_FULL:
G_FULL:
AGE: Perceived age of person stopped.
REASON_FOR_STOP:
ADS_HANDCUFFED:
TPS_DRUGS:
AGENCY_NAME:
TPS_ALCOHOL:
TIME_OF_STOP:
DATE_OF_STOP:
STOP_DURATION:
PERSON_NUMBER:
CLOSEST_CITY:
oakland_pd = ripa_df[['RAE_FULL', 'G_FULL', 'AGE', 'REASON_FOR_STOP', 'ADS_HANDCUFFED', 'TPS_DRUGS', 'TPS_ALCOHOL', 'PERSON_NUMBER', 'TIME_OF_STOP', 'DATE_OF_STOP', 'STOP_DURATION', 'CLOSEST_CITY', 'AGENCY_NAME']]
oakland_pd.head(5)
| RAE_FULL | G_FULL | AGE | REASON_FOR_STOP | ADS_HANDCUFFED | TPS_DRUGS | TPS_ALCOHOL | PERSON_NUMBER | TIME_OF_STOP | DATE_OF_STOP | STOP_DURATION | CLOSEST_CITY | AGENCY_NAME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7 | 1 | 150 | 1 | 0 | NaN | NaN | 1 | 1500 | 26-JAN-19 | 10 | OAKLAND | OAKLAND PD |
| 1 | 2 | 1 | 35 | 2 | 0 | NaN | NaN | 1 | 1541 | 25-JUN-19 | 5 | LA MESA | SAN DIEGO PD |
| 2 | 3 | 2 | 25 | 1 | 1 | 0.0 | 0.0 | 1 | 1616 | 19-JUN-19 | 40 | OAKLAND | OAKLAND PD |
| 3 | 2 | 1 | 20 | 2 | 0 | NaN | NaN | 1 | 1824 | 08-JAN-19 | 15 | OAKLAND | OAKLAND PD |
| 4 | 7 | 1 | 40 | 2 | 1 | NaN | NaN | 1 | 2320 | 02-MAY-19 | 30 | OAKLAND | OAKLAND PD |
oakland_pd = oakland_pd[oakland_pd["AGENCY_NAME"] == "OAKLAND PD"]
oakland_pd.head()
| RAE_FULL | G_FULL | AGE | REASON_FOR_STOP | ADS_HANDCUFFED | TPS_DRUGS | TPS_ALCOHOL | PERSON_NUMBER | TIME_OF_STOP | DATE_OF_STOP | STOP_DURATION | CLOSEST_CITY | AGENCY_NAME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7 | 1 | 150 | 1 | 0 | NaN | NaN | 1 | 1500 | 26-JAN-19 | 10 | OAKLAND | OAKLAND PD |
| 2 | 3 | 2 | 25 | 1 | 1 | 0.0 | 0.0 | 1 | 1616 | 19-JUN-19 | 40 | OAKLAND | OAKLAND PD |
| 3 | 2 | 1 | 20 | 2 | 0 | NaN | NaN | 1 | 1824 | 08-JAN-19 | 15 | OAKLAND | OAKLAND PD |
| 4 | 7 | 1 | 40 | 2 | 1 | NaN | NaN | 1 | 2320 | 02-MAY-19 | 30 | OAKLAND | OAKLAND PD |
| 5 | 3 | 1 | 25 | 2 | 1 | NaN | NaN | 1 | 1130 | 15-MAY-19 | 30 | OAKLAND | OAKLAND PD |
oakland_pd.info()
<class 'pandas.core.frame.DataFrame'> Index: 24395 entries, 0 to 25227 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 RAE_FULL 24395 non-null int64 1 G_FULL 24395 non-null int64 2 AGE 24395 non-null int64 3 REASON_FOR_STOP 24395 non-null int64 4 ADS_HANDCUFFED 24395 non-null int64 5 TPS_DRUGS 1908 non-null float64 6 TPS_ALCOHOL 1908 non-null float64 7 PERSON_NUMBER 24395 non-null int64 8 TIME_OF_STOP 24395 non-null int64 9 DATE_OF_STOP 24395 non-null object 10 STOP_DURATION 24395 non-null int64 11 CLOSEST_CITY 24395 non-null object 12 AGENCY_NAME 24395 non-null object dtypes: float64(2), int64(8), object(3) memory usage: 2.6+ MB
oakland_pd.describe()
| RAE_FULL | G_FULL | AGE | REASON_FOR_STOP | ADS_HANDCUFFED | TPS_DRUGS | TPS_ALCOHOL | PERSON_NUMBER | TIME_OF_STOP | STOP_DURATION | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 24395.000000 | 24395.000000 | 24395.000000 | 24395.000000 | 24395.000000 | 1908.000000 | 1908.000000 | 24395.000000 | 24395.000000 | 24395.000000 |
| mean | 3.018528 | 1.347325 | 33.702890 | 2.036278 | 0.394097 | 0.312369 | 0.009434 | 1.214224 | 1352.095429 | 31.992949 |
| std | 1.801742 | 2.303632 | 14.517176 | 15.817108 | 0.488666 | 0.463581 | 0.096695 | 0.669571 | 624.952854 | 42.382828 |
| min | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 1.000000 |
| 25% | 2.000000 | 1.000000 | 25.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 932.000000 | 10.000000 |
| 50% | 2.000000 | 1.000000 | 30.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1437.000000 | 20.000000 |
| 75% | 3.000000 | 2.000000 | 40.000000 | 2.000000 | 1.000000 | 1.000000 | 0.000000 | 1.000000 | 1831.000000 | 40.000000 |
| max | 8.000000 | 99.000000 | 150.000000 | 999.000000 | 1.000000 | 1.000000 | 1.000000 | 10.000000 | 2359.000000 | 999.000000 |
Mapping the Values into dictionaries
gender_map = {1 : "Male", 2 : "Female", 3 : "T Male", 4 : "T Female", 5 : "Nonconforming"}
race_map = {1 : "Asian", 2 : "Black", 3 : "Hispanic/Latino", 4 : "Middle Eastern or South Asian", 5 : "Native American", 6 : "Pacific Islander", 7 : "White", 8 : "Multiracial"}
reason_map = {1 : "Traffic Violation", 2 : "Reasonable Suspicion", 3 : "Parole/Probation/PRCS/mandatory supervision", 4 : "Knowledge of outstanding arrest warrant/wanted person", 5 : "Investigation to determine weather a person was truant", 6 : "Consensual encounter resulting in search", 7 : "Possible conduct under Education Code", 8 : "Determine whether student violated school policy"}
handcuffed_map = {0 : "No", 1 : "Yes"}
drugs_map = {0 : "No", 1 : "Yes"}
alc_map = {0 : "No", 1 : "Yes"}
race_counts = oakland_pd["RAE_FULL"].replace(race_map).value_counts()
race_counts
RAE_FULL Black 12856 Hispanic/Latino 6031 White 3268 Asian 1173 Middle Eastern or South Asian 578 Multiracial 283 Pacific Islander 177 Native American 29 Name: count, dtype: int64
gender_counts = oakland_pd["G_FULL"].replace(gender_map).value_counts()
gender_counts
G_FULL Male 17282 Female 7054 Nonconforming 20 99 13 T Male 13 T Female 13 Name: count, dtype: int64
oakland_pd["AGE"].describe()
count 24395.000000 mean 33.702890 std 14.517176 min 1.000000 25% 25.000000 50% 30.000000 75% 40.000000 max 150.000000 Name: AGE, dtype: float64
len(oakland_pd[oakland_pd["AGE"] >= 100])
106
Quick Stats
top_3_races = race_counts.head(3)
total_top_race = top_3_races.sum()
race_ratios = top_3_races / total_top_race
race_ratios
RAE_FULL Black 0.580275 Hispanic/Latino 0.272218 White 0.147506 Name: count, dtype: float64
top_3_genders = gender_counts.head(3)
total_top_genders = top_3_genders.sum()
gender_ratios = top_3_genders / total_top_genders
gender_ratios
G_FULL Male 0.709558 Female 0.289621 Nonconforming 0.000821 Name: count, dtype: float64
oakland_pd['AGE'].mode().iloc[0]
30
percentage_handcuffs = (oakland_pd["ADS_HANDCUFFED"].sum() / oakland_pd.shape[0])
percentage_handcuffs
0.39409715105554416
percentage_drugs = (oakland_pd["TPS_DRUGS"].sum() / oakland_pd.shape[0])
percentage_drugs
0.024431235908997744
Irregularities
oakland_pd.head()
| RAE_FULL | G_FULL | AGE | REASON_FOR_STOP | ADS_HANDCUFFED | TPS_DRUGS | TPS_ALCOHOL | PERSON_NUMBER | TIME_OF_STOP | DATE_OF_STOP | STOP_DURATION | CLOSEST_CITY | AGENCY_NAME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7 | 1 | 150 | 1 | 0 | NaN | NaN | 1 | 1500 | 26-JAN-19 | 10 | OAKLAND | OAKLAND PD |
| 2 | 3 | 2 | 25 | 1 | 1 | 0.0 | 0.0 | 1 | 1616 | 19-JUN-19 | 40 | OAKLAND | OAKLAND PD |
| 3 | 2 | 1 | 20 | 2 | 0 | NaN | NaN | 1 | 1824 | 08-JAN-19 | 15 | OAKLAND | OAKLAND PD |
| 4 | 7 | 1 | 40 | 2 | 1 | NaN | NaN | 1 | 2320 | 02-MAY-19 | 30 | OAKLAND | OAKLAND PD |
| 5 | 3 | 1 | 25 | 2 | 1 | NaN | NaN | 1 | 1130 | 15-MAY-19 | 30 | OAKLAND | OAKLAND PD |
Assuming Missing Values as Zeroes
oakland_pd = oakland_pd.fillna(0)
oakland_pd.head()
| RAE_FULL | G_FULL | AGE | REASON_FOR_STOP | ADS_HANDCUFFED | TPS_DRUGS | TPS_ALCOHOL | PERSON_NUMBER | TIME_OF_STOP | DATE_OF_STOP | STOP_DURATION | CLOSEST_CITY | AGENCY_NAME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7 | 1 | 150 | 1 | 0 | 0.0 | 0.0 | 1 | 1500 | 26-JAN-19 | 10 | OAKLAND | OAKLAND PD |
| 2 | 3 | 2 | 25 | 1 | 1 | 0.0 | 0.0 | 1 | 1616 | 19-JUN-19 | 40 | OAKLAND | OAKLAND PD |
| 3 | 2 | 1 | 20 | 2 | 0 | 0.0 | 0.0 | 1 | 1824 | 08-JAN-19 | 15 | OAKLAND | OAKLAND PD |
| 4 | 7 | 1 | 40 | 2 | 1 | 0.0 | 0.0 | 1 | 2320 | 02-MAY-19 | 30 | OAKLAND | OAKLAND PD |
| 5 | 3 | 1 | 25 | 2 | 1 | 0.0 | 0.0 | 1 | 1130 | 15-MAY-19 | 30 | OAKLAND | OAKLAND PD |
Renaming the columns for accesibility
oakland_pd = oakland_pd.rename(columns={"RAE_FULL" : "Race", "G_FULL" : "Gender", "AGE" : "Age", "REASON_FOR_STOP" : "Reason", "ADS_HANDCUFFED" : "Handcuffed", "TPS_DRUGS" : "Drugs", "TPS_ALCOHOL" : "Alcohol", "PERSON_NUMBER" : "Persons", "TIME_OF_STOP" : "Hour", "DATE_OF_STOP" : "Day", "STOP_DURATION" : "Duration", "CLOSEST_CITY" : "City", "AGENCY_NAME" : "Agency"})
oakland_pd.head()
| Race | Gender | Age | Reason | Handcuffed | Drugs | Alcohol | Persons | Hour | Day | Duration | City | Agency | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7 | 1 | 150 | 1 | 0 | 0.0 | 0.0 | 1 | 1500 | 26-JAN-19 | 10 | OAKLAND | OAKLAND PD |
| 2 | 3 | 2 | 25 | 1 | 1 | 0.0 | 0.0 | 1 | 1616 | 19-JUN-19 | 40 | OAKLAND | OAKLAND PD |
| 3 | 2 | 1 | 20 | 2 | 0 | 0.0 | 0.0 | 1 | 1824 | 08-JAN-19 | 15 | OAKLAND | OAKLAND PD |
| 4 | 7 | 1 | 40 | 2 | 1 | 0.0 | 0.0 | 1 | 2320 | 02-MAY-19 | 30 | OAKLAND | OAKLAND PD |
| 5 | 3 | 1 | 25 | 2 | 1 | 0.0 | 0.0 | 1 | 1130 | 15-MAY-19 | 30 | OAKLAND | OAKLAND PD |
Replacing Values as Defined in the documentation
oakland_pd["Gender"] = oakland_pd["Gender"].replace(gender_map)
oakland_pd["Race"] = oakland_pd["Race"].replace(race_map)
oakland_pd["Reason"] = oakland_pd["Reason"].replace(reason_map)
oakland_pd["Handcuffed"] = oakland_pd["Handcuffed"].replace(handcuffed_map)
oakland_pd["Drugs"] = oakland_pd["Drugs"].replace(drugs_map)
oakland_pd['Day'] = pd.to_datetime(oakland_pd['Day'])
oakland_pd["Alcohol"] = oakland_pd["Alcohol"].replace(alc_map)
oakland_pd.head()
/var/folders/tj/g_7k1_0n1ql5y1bqbkbsz7x00000gn/T/ipykernel_20505/2686835809.py:6: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format. oakland_pd['Day'] = pd.to_datetime(oakland_pd['Day'])
| Race | Gender | Age | Reason | Handcuffed | Drugs | Alcohol | Persons | Hour | Day | Duration | City | Agency | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | White | Male | 150 | Traffic Violation | No | No | No | 1 | 1500 | 2019-01-26 | 10 | OAKLAND | OAKLAND PD |
| 2 | Hispanic/Latino | Female | 25 | Traffic Violation | Yes | No | No | 1 | 1616 | 2019-06-19 | 40 | OAKLAND | OAKLAND PD |
| 3 | Black | Male | 20 | Reasonable Suspicion | No | No | No | 1 | 1824 | 2019-01-08 | 15 | OAKLAND | OAKLAND PD |
| 4 | White | Male | 40 | Reasonable Suspicion | Yes | No | No | 1 | 2320 | 2019-05-02 | 30 | OAKLAND | OAKLAND PD |
| 5 | Hispanic/Latino | Male | 25 | Reasonable Suspicion | Yes | No | No | 1 | 1130 | 2019-05-15 | 30 | OAKLAND | OAKLAND PD |
Distribution of Age by Race
def age_distribution_by_race(dataframe):
age_by_race = dataframe.groupby("Race")["Age"].describe()
return age_by_race
age_distribution_by_race_df = age_distribution_by_race(oakland_pd)
age_distribution_by_race_df
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Race | ||||||||
| Asian | 1173.0 | 37.091219 | 14.043972 | 1.0 | 25.0 | 35.0 | 50.0 | 90.0 |
| Black | 12856.0 | 33.587663 | 12.614733 | 1.0 | 25.0 | 30.0 | 40.0 | 93.0 |
| Hispanic/Latino | 6031.0 | 29.721771 | 10.219881 | 1.0 | 20.0 | 30.0 | 35.0 | 80.0 |
| Middle Eastern or South Asian | 578.0 | 32.396194 | 10.569262 | 10.0 | 25.0 | 30.0 | 40.0 | 70.0 |
| Multiracial | 283.0 | 32.805654 | 14.890417 | 10.0 | 25.0 | 30.0 | 40.0 | 150.0 |
| Native American | 29.0 | 42.448276 | 13.149107 | 20.0 | 30.0 | 45.0 | 50.0 | 70.0 |
| Pacific Islander | 177.0 | 30.372881 | 9.808794 | 12.0 | 21.0 | 30.0 | 35.0 | 70.0 |
| White | 3268.0 | 40.698592 | 23.544773 | 1.0 | 30.0 | 35.0 | 50.0 | 150.0 |
Curious about the Max age
oakland_pd[(oakland_pd['Age'] > 100)].head()
| Race | Gender | Age | Reason | Handcuffed | Drugs | Alcohol | Persons | Hour | Day | Duration | City | Agency | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | White | Male | 150 | Traffic Violation | No | No | No | 1 | 1500 | 2019-01-26 | 10 | OAKLAND | OAKLAND PD |
| 165 | White | Male | 150 | Reasonable Suspicion | Yes | No | No | 1 | 704 | 2019-02-08 | 15 | OAKLAND | OAKLAND PD |
| 184 | White | Male | 150 | Reasonable Suspicion | Yes | No | No | 1 | 1021 | 2019-06-10 | 40 | OAKLAND | OAKLAND PD |
| 485 | White | Female | 150 | Traffic Violation | No | No | No | 1 | 1119 | 2019-11-20 | 15 | OAKLAND | OAKLAND PD |
| 575 | White | Male | 150 | Traffic Violation | No | No | No | 1 | 1023 | 2019-06-13 | 10 | OAKLAND | OAKLAND PD |
oakland_pd[(oakland_pd['Age'] > 100)]["Race"].value_counts()
Race White 104 Multiracial 2 Name: count, dtype: int64
Age Distribution by Race, Visualized
dabr = sns.violinplot(data=oakland_pd[(oakland_pd['Age'] < 100)].sort_values("Race"),
y="Race", x="Age",
saturation=0.7, palette="Set3")
dabr.set_title("Distribution of Age by Race");
Number of Stops Distributed by Race
sns.countplot(data=oakland_pd, y="Race", palette="pastel")
plt.title("Stop Distribution by Race")
plt.show()
Number of Stops across Races, Distributed by Age
sns.displot(data=oakland_pd[(oakland_pd['Age'] < 100)], x = "Age", hue = "Race", bins = 15, palette="pastel", height = 5, aspect = 2, kde = True)
plt.title("Age Distribution by Race of Stops by Oakland PD")
plt.xlabel("Age")
plt.ylabel("Number of Stops")
plt.show()
Number of Stops across Gender, Distributed by Age
sns.displot(data=oakland_pd[(oakland_pd['Gender'] != 99) & (oakland_pd['Age'] < 100)], x = "Age", hue = "Gender", bins = 15, palette="pastel", height = 5, aspect = 2, kde = True)
plt.title("Age Distribution by Gender of Stops by Oakland PD")
plt.xlabel("Age")
plt.ylabel("Number of Stops")
plt.show()
Percentage of Stops by Race and Gender
def percentage_of_stops_by_race_and_gender(dataframe):
total_stops = len(dataframe)
race_gender_counts = dataframe.groupby(["Race", "Gender"]).size()
percentage_stops_by_race_gender = (race_gender_counts / total_stops) * 100
percentage_stops_by_race_gender_df = percentage_stops_by_race_gender.reset_index(name='Percentage of Stops')
return percentage_stops_by_race_gender_df
stops_by_race_gender_percentage_df = percentage_of_stops_by_race_and_gender(oakland_pd)
table = pd.pivot_table(stops_by_race_gender_percentage_df, index='Race', columns='Gender', values='Percentage of Stops')
table.fillna(0)
| Gender | 99 | Female | Male | Nonconforming | T Female | T Male |
|---|---|---|---|---|---|---|
| Race | ||||||
| Asian | 0.000000 | 1.598688 | 3.205575 | 0.000000 | 0.004099 | 0.000000 |
| Black | 0.032794 | 16.109859 | 36.474687 | 0.028694 | 0.024595 | 0.028694 |
| Hispanic/Latino | 0.004099 | 5.386350 | 19.294937 | 0.020496 | 0.012298 | 0.004099 |
| Middle Eastern or South Asian | 0.000000 | 0.336134 | 2.029104 | 0.004099 | 0.000000 | 0.000000 |
| Multiracial | 0.000000 | 0.373027 | 0.774749 | 0.008198 | 0.004099 | 0.000000 |
| Native American | 0.000000 | 0.045091 | 0.073786 | 0.000000 | 0.000000 | 0.000000 |
| Pacific Islander | 0.000000 | 0.291043 | 0.434515 | 0.000000 | 0.000000 | 0.000000 |
| White | 0.016397 | 4.775569 | 8.555032 | 0.020496 | 0.008198 | 0.020496 |
Each cell represents percentages of Gender/Race combinations for all Stops
• As the values besides Male and Female are significantly low, I will group them on a single category
Filtering by Sex
by_sex = oakland_pd.replace({"Nonconforming": "Nonconforming/ T Male/ T Female",
'T Male' : "Nonconforming/ T Male/ T Female",
'T Female' :"Nonconforming/ T Male/ T Female"})[oakland_pd["Gender"] != 99]
Gender Differences in Stops, Across Races, Distributed by Age
sns.displot(data=by_sex, x="Age", hue="Race", col="Gender", kind="hist", bins=10, kde=True, palette="pastel", stat="density"
, height = 4, aspect = 1)
plt.xlabel("Age")
plt.show()
• Even while grouping Genders aside from Male and Female, their distributions are hard to visualize.
Reason Counts
oakland_pd["Reason"].value_counts()
Reason Reasonable Suspicion 13578 Traffic Violation 9362 Knowledge of outstanding arrest warrant/wanted person 1002 Parole/Probation/PRCS/mandatory supervision 235 Consensual encounter resulting in search 166 Investigation to determine weather a person was truant 32 99 13 999 6 Possible conduct under Education Code 1 Name: count, dtype: int64
Counts visualized
sns.histplot(data=oakland_pd[(oakland_pd['Reason'] != 99) & (oakland_pd['Reason'] != 999)], y="Reason", hue="Reason", palette="pastel")
plt.title("Reason for Stop")
plt.ylabel("Reason")
plt.xlabel("Number of Stops")
plt.show()
It seems to be the case that the most common reason for a stop is Reasonable suspicion, followed by Traffic violations.
For consistency in the hue display for the coming plots
reason_order = ["Traffic Violation", "Reasonable Suspicion",
"Parole/Probation/PRCS/mandatory supervision",
"Konwledge of outstanding arrest warrant/wanted person",
"Investigation to determine weather a person was truant",
"Consensual encounter resulting in search",
"Possible conduct under Education Code",
"Determine whether student violated school policy"]
Reason of Stops Percentaged by Race
def percentage_of_stops_by_reason_and_race(dataframe):
race_reason_counts = dataframe[(dataframe["Reason"] != 99) & (dataframe["Reason"] != 999)].groupby(["Race", "Reason"]).size().unstack().fillna(0)
race_reason_percentage = race_reason_counts.div(race_reason_counts.sum(axis=0), axis=1) * 100
race_reason_percentage.reset_index(inplace=True)
race_reason_percentage.rename_axis(None, axis=1, inplace=True)
return race_reason_percentage
stops_by_reason_race_percentage_df = percentage_of_stops_by_reason_and_race(oakland_pd)
stops_by_reason_race_percentage_df
| Race | Consensual encounter resulting in search | Investigation to determine weather a person was truant | Knowledge of outstanding arrest warrant/wanted person | Parole/Probation/PRCS/mandatory supervision | Possible conduct under Education Code | Reasonable Suspicion | Traffic Violation | |
|---|---|---|---|---|---|---|---|---|
| 0 | Asian | 3.012048 | 6.250 | 3.792415 | 5.957447 | 0.0 | 3.468847 | 6.857509 |
| 1 | Black | 52.409639 | 50.000 | 65.768463 | 65.531915 | 0.0 | 55.965532 | 46.250801 |
| 2 | Hispanic/Latino | 23.493976 | 34.375 | 19.760479 | 19.574468 | 100.0 | 23.022536 | 27.846614 |
| 3 | Middle Eastern or South Asian | 1.204819 | 0.000 | 1.097804 | 1.276596 | 0.0 | 1.701281 | 3.524888 |
| 4 | Multiracial | 0.602410 | 0.000 | 1.297405 | 2.127660 | 0.0 | 0.780675 | 1.687674 |
| 5 | Native American | 0.000000 | 0.000 | 0.499002 | 0.000000 | 0.0 | 0.147297 | 0.042726 |
| 6 | Pacific Islander | 1.204819 | 0.000 | 1.197605 | 0.000000 | 0.0 | 0.795404 | 0.587481 |
| 7 | White | 18.072289 | 9.375 | 6.586826 | 5.531915 | 0.0 | 14.118427 | 13.202307 |
Each cell represents the percentages of a Race/Reason for stop combination for all Stops
The displayed ammount represents the general proportion of a Race by a given Reason of Stop
Reason of Stop Percentaged within Race
def percentage_of_reason_by_race(dataframe):
race_reason_counts = dataframe[(dataframe["Reason"] != 99) & (dataframe["Reason"] != 999)].groupby(["Race", "Reason"]).size().unstack().fillna(0)
race_reason_percentage = race_reason_counts.div(race_reason_counts.sum(axis=1), axis=0) * 100
race_reason_percentage.reset_index(inplace=True)
race_reason_percentage.rename_axis(None, axis=1, inplace=True)
return race_reason_percentage
reason_by_race_percentage_df = percentage_of_reason_by_race(oakland_pd)
reason_by_race_percentage_df
| Race | Consensual encounter resulting in search | Investigation to determine weather a person was truant | Knowledge of outstanding arrest warrant/wanted person | Parole/Probation/PRCS/mandatory supervision | Possible conduct under Education Code | Reasonable Suspicion | Traffic Violation | |
|---|---|---|---|---|---|---|---|---|
| 0 | Asian | 0.426621 | 0.170648 | 3.242321 | 1.194539 | 0.000000 | 40.187713 | 54.778157 |
| 1 | Black | 0.677306 | 0.124562 | 5.130401 | 1.198910 | 0.000000 | 59.159206 | 33.709615 |
| 2 | Hispanic/Latino | 0.646981 | 0.182482 | 3.284672 | 0.763106 | 0.016589 | 51.857996 | 43.248175 |
| 3 | Middle Eastern or South Asian | 0.346620 | 0.000000 | 1.906412 | 0.519931 | 0.000000 | 40.034662 | 57.192374 |
| 4 | Multiracial | 0.353357 | 0.000000 | 4.593640 | 1.766784 | 0.000000 | 37.455830 | 55.830389 |
| 5 | Native American | 0.000000 | 0.000000 | 17.241379 | 0.000000 | 0.000000 | 68.965517 | 13.793103 |
| 6 | Pacific Islander | 1.129944 | 0.000000 | 6.779661 | 0.000000 | 0.000000 | 61.016949 | 31.073446 |
| 7 | White | 0.918836 | 0.091884 | 2.021440 | 0.398162 | 0.000000 | 58.713629 | 37.856049 |
Each cell represents the percentages of a Race/Reason for stop combination for all Stops of the same Race
The displayed ammount represents the relative proportion of a Race within, by a given Reason of Stop
Asian:
For Race, By Sex
def plot_distribution_by_race(dataframe, race):
race_data = dataframe[(dataframe["Race"] == race) & (dataframe['Age'] <= 100)]
g = sns.displot(data=race_data, x="Age", hue="Reason", hue_order=reason_order, col="Gender",
col_order=["Female","Male"], kind="hist", bins=10, kde=True,
palette="pastel", height=5, aspect=1)
g.fig.suptitle(f"Age Distribution for race: {race}", y=1.05)
plt.xlabel("Age")
plt.show()
unique_races = by_sex["Race"].unique()
for race in unique_races:
plot_distribution_by_race(by_sex, race)
Counts
oakland_pd["Handcuffed"].value_counts()
Handcuffed No 14781 Yes 9614 Name: count, dtype: int64
Defining the instances where a Handcuff Ocurred
handcuffed = oakland_pd[oakland_pd["Handcuffed"] == "Yes"]
Handcuffing Percentages by Race
def race_percentages_in_handcuff_stops(dataframe):
handcuff_stops = dataframe[dataframe["Handcuffed"] == "Yes"]
race_counts_in_handcuff_stops = handcuff_stops["Race"].value_counts()
total_handcuff_stops = len(handcuff_stops)
percentage_race_in_handcuff_stops = (race_counts_in_handcuff_stops / total_handcuff_stops) * 100
percentage_race_in_handcuff_stops_df = percentage_race_in_handcuff_stops.reset_index()
percentage_race_in_handcuff_stops_df.columns = ["Race", "Percentage in Handcuff Stops"]
return percentage_race_in_handcuff_stops_df
race_percentages_in_handcuff_stops_df = race_percentages_in_handcuff_stops(oakland_pd)
race_percentages_in_handcuff_stops_df
| Race | Percentage in Handcuff Stops | |
|---|---|---|
| 0 | Black | 58.612440 |
| 1 | Hispanic/Latino | 24.869981 |
| 2 | White | 10.328687 |
| 3 | Asian | 3.151654 |
| 4 | Middle Eastern or South Asian | 1.341793 |
| 5 | Pacific Islander | 0.821718 |
| 6 | Multiracial | 0.748908 |
| 7 | Native American | 0.124818 |
• It seems that the handcuffings of Black individuals make up almost 60% of all handcuffings.
Handcuff Trends by Reason of Stop
sns.histplot(data=handcuffed[(handcuffed['Reason'] != 99) & (handcuffed['Reason'] != 999)], y="Reason", hue="Reason", palette="pastel")
plt.title("Reason for Stop in Handcuffings")
plt.ylabel("Reason")
plt.xlabel("Number of Stops")
plt.show()
• Seems to be that the most common Reason for Stop that leads into a Handcuff is "Reasonable Suspicion""
Distribution for the Reason of Stop in Handcuffs. For Race, by Sex
for race in unique_races:
plot_distribution_by_race(handcuffed[(handcuffed['Age'] <= 100)], race)
Not Handcuffed
not_handcuffed = oakland_pd[oakland_pd["Handcuffed"] == "No"]
for race in unique_races:
plot_distribution_by_race(not_handcuffed[(not_handcuffed['Age'] <= 100)], race)
Note that the plotter function displays counts and not density as the stat
Defining when were Drugs Found
drugs_found = oakland_pd[oakland_pd["Drugs"] == "Yes"]
Counts By Race and Sex
sns.displot(data=drugs_found, y="Race", palette="pastel", hue="Race", hue_order=unique_races, col="Gender", col_order=["Female","Male"])
plt.title(f"Distribution of Drugs found by Race and Sex", y=1.05)
plt.show()
Gender Percentages Across Race
def percentage_of_drug_stops_by_race_and_gender(dataframe):
valid_gender_df = dataframe[dataframe["Gender"] != 99]
total_drug_stops = valid_gender_df["Drugs"].value_counts()["Yes"]
race_gender_counts = valid_gender_df.groupby(["Race", "Gender"])["Drugs"].apply(lambda x: (x == "Yes").sum())
percentage_drug_stops_by_race_gender = race_gender_counts / total_drug_stops * 100
percentage_drug_stops_by_race_gender_df = percentage_drug_stops_by_race_gender.reset_index(
name='Percentage in Drug Stops')
return percentage_drug_stops_by_race_gender_df
drug_stops_by_race_gender_percentage_df = percentage_of_drug_stops_by_race_and_gender(oakland_pd)
table_drugs = pd.pivot_table(drug_stops_by_race_gender_percentage_df, index='Race',
columns='Gender', values='Percentage in Drug Stops')
table_drugs.fillna(0)
| Gender | Female | Male | Nonconforming | T Female | T Male |
|---|---|---|---|---|---|
| Race | |||||
| Asian | 0.335570 | 2.684564 | 0.0 | 0.0 | 0.0 |
| Black | 7.885906 | 52.348993 | 0.0 | 0.0 | 0.0 |
| Hispanic/Latino | 2.013423 | 22.986577 | 0.0 | 0.0 | 0.0 |
| Middle Eastern or South Asian | 0.000000 | 1.006711 | 0.0 | 0.0 | 0.0 |
| Multiracial | 0.503356 | 0.167785 | 0.0 | 0.0 | 0.0 |
| Native American | 0.000000 | 0.167785 | 0.0 | 0.0 | 0.0 |
| Pacific Islander | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 |
| White | 2.013423 | 7.885906 | 0.0 | 0.0 | 0.0 |
oakland_pd["Drugs"].value_counts()["Yes"] / len(oakland_pd) * 100
2.4431235908997744
Each cell represents the percentages of a Race/Gender combination for all Stops where Drugs were found within the same Race
The displayed ammount represents the relative proportion of a Race within, by Gender
The proportions are relatively low for some gender identities, suggesting that certain gender identities may be underrepresented and that there might be imbalances in the data collection process during police stops
Still, we can observe the following:
Black Males represent approximately 14.93% of all police stops.
By Reason of Stop, Race and Sex. Distributed by Age
for race in unique_races:
plot_distribution_by_race(drugs_found, race)
/var/folders/tj/g_7k1_0n1ql5y1bqbkbsz7x00000gn/T/ipykernel_20505/2764074444.py:4: UserWarning: Ignoring `palette` because no `hue` variable has been assigned. g = sns.displot(data=race_data, x="Age", hue="Reason", hue_order=reason_order, col="Gender",
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[149], line 2 1 for race in unique_races: ----> 2 plot_distribution_by_race(drugs_found, race) Cell In[136], line 4, in plot_distribution_by_race(dataframe, race) 1 def plot_distribution_by_race(dataframe, race): 2 race_data = dataframe[(dataframe["Race"] == race) & (dataframe['Age'] <= 100)] ----> 4 g = sns.displot(data=race_data, x="Age", hue="Reason", hue_order=reason_order, col="Gender", 5 col_order=["Female","Male"], kind="hist", bins=10, kde=True, 6 palette="pastel", height=5, aspect=1) 7 g.fig.suptitle(f"Age Distribution for race: {race}", y=1.05) 8 plt.xlabel("Age") File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/seaborn/distributions.py:2210, in displot(data, x, y, hue, row, col, weights, kind, rug, rug_kws, log_scale, legend, palette, hue_order, hue_norm, color, col_wrap, row_order, col_order, height, aspect, facet_kws, **kwargs) 2207 if p.univariate: 2209 _assign_default_kwargs(hist_kws, p.plot_univariate_histogram, histplot) -> 2210 p.plot_univariate_histogram(**hist_kws) 2212 else: 2214 _assign_default_kwargs(hist_kws, p.plot_bivariate_histogram, histplot) File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/seaborn/distributions.py:451, in _DistributionPlotter.plot_univariate_histogram(self, multiple, element, fill, common_norm, common_bins, shrink, kde, kde_kws, color, legend, line_kws, estimate_kws, **plot_kws) 449 kde_kws["cumulative"] = estimate_kws["cumulative"] 450 log_scale = self._log_scaled(self.data_variable) --> 451 densities = self._compute_univariate_density( 452 self.data_variable, 453 common_norm, 454 common_bins, 455 kde_kws, 456 log_scale, 457 warn_singular=False, 458 ) 460 # First pass through the data to compute the histograms 461 for sub_vars, sub_data in self.iter_data("hue", from_comp_data=True): 462 463 # Prepare the relevant data File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/seaborn/distributions.py:317, in _DistributionPlotter._compute_univariate_density(self, data_variable, common_norm, common_grid, estimate_kws, log_scale, warn_singular) 315 if common_grid: 316 all_observations = self.comp_data.dropna() --> 317 estimator.define_support(all_observations[data_variable]) 318 else: 319 common_norm = False File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/seaborn/_statistics.py:127, in KDE.define_support(self, x1, x2, weights, cache) 125 """Create the evaluation grid for a given data set.""" 126 if x2 is None: --> 127 support = self._define_support_univariate(x1, weights) 128 else: 129 support = self._define_support_bivariate(x1, x2, weights) File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/seaborn/_statistics.py:99, in KDE._define_support_univariate(self, x, weights) 97 def _define_support_univariate(self, x, weights): 98 """Create a 1D grid of evaluation points.""" ---> 99 kde = self._fit(x, weights) 100 bw = np.sqrt(kde.covariance.squeeze()) 101 grid = self._define_support_grid( 102 x, bw, self.cut, self.clip, self.gridsize 103 ) File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/seaborn/_statistics.py:142, in KDE._fit(self, fit_data, weights) 139 if weights is not None: 140 fit_kws["weights"] = weights --> 142 kde = gaussian_kde(fit_data, **fit_kws) 143 kde.set_bandwidth(kde.factor * self.bw_adjust) 145 return kde File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/scipy/stats/_kde.py:199, in gaussian_kde.__init__(self, dataset, bw_method, weights) 197 self.dataset = atleast_2d(asarray(dataset)) 198 if not self.dataset.size > 1: --> 199 raise ValueError("`dataset` input should have multiple elements.") 201 self.d, self.n = self.dataset.shape 203 if weights is not None: ValueError: `dataset` input should have multiple elements.
• Not enough Data for Middle Eastern, Pacific Islander and Native American
Handcuffs when Drugs were found
sns.countplot(data=drugs_found, y="Handcuffed", hue="Race", hue_order=unique_races, palette="pastel")
plt.title("Distribution of Drugs Found by Race")
plt.xlabel("Number of Stops")
plt.show()
def general_stats(dataframe):
total_handcuffed_counts = len(dataframe[dataframe["Handcuffed"] == "Yes"])
total_drugs_found_counts = len(dataframe[dataframe["Drugs"] == "Yes"])
total_handcuffed_counts = len(dataframe[dataframe["Handcuffed"] == "Yes"])
unique_races = dataframe["Race"].unique()
race_list = []
handcuffed_percentage_list = []
handcuffed_count_list = []
total_stops_count_list = []
total_stops_percentage_list = []
relative_handcuffed_percentage_list = []
drugs_found_count_list = []
drugs_found_percentage_list = []
drugs_handcuff_relation_list = []
total_drug_percentage_list = []
for race in unique_races:
race_data = dataframe[dataframe["Race"] == race]
race_counts = len(race_data)
handcuffed_counts = len(race_data[race_data["Handcuffed"] == "Yes"])
handcuffed_percentage = (handcuffed_counts / total_handcuffed_counts) * 100
total_stops_percentage = (race_counts / len(dataframe)) * 100
relative_handcuffed_percentage = (handcuffed_counts / race_counts) * 100
drugs_found_counts = len(race_data[race_data["Drugs"] == "Yes"])
drugs_found_percentage = (drugs_found_counts / total_drugs_found_counts) * 100
drugs_handcuff_relation = (drugs_found_counts / total_handcuffed_counts) * 100
total_drugs_found_percentage = (drugs_found_counts / race_counts) * 100
race_list.append(race)
handcuffed_percentage_list.append(handcuffed_percentage)
handcuffed_count_list.append(handcuffed_counts)
total_stops_count_list.append(race_counts)
total_stops_percentage_list.append(total_stops_percentage)
relative_handcuffed_percentage_list.append(relative_handcuffed_percentage)
drugs_found_count_list.append(drugs_found_counts)
drugs_found_percentage_list.append(drugs_found_percentage)
drugs_handcuff_relation_list.append(drugs_handcuff_relation)
total_drug_percentage_list.append(total_drugs_found_percentage)
result_df = pd.DataFrame({
"Race": race_list,
"Number of Stops": total_stops_count_list,
"Number of Handcuffs": handcuffed_count_list,
"Instances of Drugs Found": drugs_found_count_list,
"Proportion Total Stops %": total_stops_percentage_list,
"Proportion Total Handcuffs %": handcuffed_percentage_list,
"Proportion Drugs Found %": total_drug_percentage_list,
"Relative Prop. of Handcuffs": relative_handcuffed_percentage_list,
"Relative Prop. of Drugs Found": drugs_found_percentage_list
})
return result_df
general_stats(oakland_pd)
| Race | Number of Stops | Number of Handcuffs | Instances of Drugs Found | Proportion Total Stops % | Proportion Total Handcuffs % | Proportion Drugs Found % | Relative Prop. of Handcuffs | Relative Prop. of Drugs Found | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | White | 3268 | 993 | 59 | 13.396188 | 10.328687 | 1.805386 | 30.385557 | 9.899329 |
| 1 | Hispanic/Latino | 6031 | 2391 | 149 | 24.722279 | 24.869981 | 2.470569 | 39.645167 | 25.000000 |
| 2 | Black | 12856 | 5635 | 359 | 52.699324 | 58.612440 | 2.792470 | 43.831674 | 60.234899 |
| 3 | Asian | 1173 | 303 | 18 | 4.808362 | 3.151654 | 1.534527 | 25.831202 | 3.020134 |
| 4 | Multiracial | 283 | 72 | 4 | 1.160074 | 0.748908 | 1.413428 | 25.441696 | 0.671141 |
| 5 | Middle Eastern or South Asian | 578 | 129 | 6 | 2.369338 | 1.341793 | 1.038062 | 22.318339 | 1.006711 |
| 6 | Pacific Islander | 177 | 79 | 0 | 0.725559 | 0.821718 | 0.000000 | 44.632768 | 0.000000 |
| 7 | Native American | 29 | 12 | 1 | 0.118877 | 0.124818 | 3.448276 | 41.379310 | 0.167785 |
What does this mean?
Black individuals constitute the racial group with the highest number of stops, handcuffs, and instances of drugs found. They have the highest proportion of total stops, handcuffs, and drug occurrences among all racial categories.
Hispanic/Latino individuals also have a significant number of stops, handcuffs, and instances of drugs found, with proportions relatively close to their total stops.
White individuals have a lower proportion of handcuffs and drug occurrences compared to their proportion of total stops.
The "Pacific Islander" and "Native American" groups have comparatively lower numbers of stops and handcuffs,
but the proportion of handcuffs applied to them is relatively high compared to their total stops.
Distribution of the time of the Stop by Race
sns.displot(data=oakland_pd, x='Hour', hue='Race', kde=True, bins=24, palette='pastel', height=5, aspect=2)
plt.title("Distribution of Stop Hour by Race")
plt.xlabel("Hour of Stop")
plt.ylabel("Number of Stops")
plt.show()
By Race and Sex
sns.displot(data=by_sex[by_sex["Gender"] != "Nonconforming/ T Male/ T Female"], x='Hour', hue='Race', col='Gender', kde=True, bins=24, palette='pastel', height=5, aspect=1)
plt.suptitle("Distribution of Stop Hour by Race and Gender", y=1.05)
plt.xlabel("Hour of Stop")
plt.ylabel("Number of Stops")
plt.show()
Distribution of Date of the Stop by Race
sns.displot(data=oakland_pd, x='Day', hue='Race', kde=True, bins=36, palette='pastel', height=5, aspect=2)
plt.title("Distribution of Stop Day by Race")
plt.xlabel("Date of Stop")
plt.ylabel("Number of Stops")
plt.show()
• Interesting spike in the middle. Let's see what it means.
Days with the most stops by Race
def day_most_stops_by_race(dataframe):
unique_races = dataframe['Race'].unique()
for race in unique_races:
race_stops = dataframe[dataframe['Race'] == race]
stops_by_day_race = race_stops['Day'].value_counts()
most_stops_day_race = stops_by_day_race.idxmax()
most_stops_day_formatted = most_stops_day_race.strftime("%B %d")
print(f"The day when race '{race}' had the most stops:", most_stops_day_formatted)
day_most_stops_by_race(oakland_pd)
The day when race 'White' had the most stops: June 07 The day when race 'Hispanic/Latino' had the most stops: July 04 The day when race 'Black' had the most stops: July 04 The day when race 'Asian' had the most stops: July 22 The day when race 'Multiracial' had the most stops: July 22 The day when race 'Middle Eastern or South Asian' had the most stops: April 14 The day when race 'Pacific Islander' had the most stops: July 25 The day when race 'Native American' had the most stops: November 15
Feeling Patriotic?
• I would be interested in knowing more about July 4th
Stop duration by Race
oakland_pd["Duration"].describe()
count 24395.000000 mean 31.992949 std 42.382828 min 1.000000 25% 10.000000 50% 20.000000 75% 40.000000 max 999.000000 Name: Duration, dtype: float64
Limiting to 5 Hours (for better visualization)
duration_df = by_sex[by_sex['Duration'] <= 300]
sns.displot(data=duration_df[duration_df["Gender"] != "Nonconforming/ T Male/ T Female"], x='Duration', hue='Race', col="Gender", bins=15, kde=True, palette='pastel')
plt.title("Distribution of Stop Duration by Race")
plt.xlabel("Duration of Stop (minutes)")
plt.ylabel("Number of Stops")
plt.tight_layout()
plt.show()
Percentages of Stop Duration by Race)
percentage_by_race = oakland_pd.groupby(['Race', pd.cut(duration_df['Duration'], bins=[0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300])]).size().unstack()
percentage_by_race = percentage_by_race.div(percentage_by_race.sum(axis=1), axis=0) * 100
percentage_by_race = percentage_by_race.round(2)
percentage_by_race
| Duration | (0, 30] | (30, 60] | (60, 90] | (90, 120] | (120, 150] | (150, 180] | (180, 210] | (210, 240] | (240, 270] | (270, 300] |
|---|---|---|---|---|---|---|---|---|---|---|
| Race | ||||||||||
| Asian | 81.30 | 13.49 | 2.22 | 1.62 | 0.09 | 1.02 | 0.00 | 0.17 | 0.00 | 0.09 |
| Black | 72.10 | 20.11 | 3.03 | 3.05 | 0.26 | 0.80 | 0.09 | 0.34 | 0.02 | 0.22 |
| Hispanic/Latino | 73.98 | 18.14 | 3.46 | 2.77 | 0.28 | 0.60 | 0.13 | 0.45 | 0.02 | 0.17 |
| Middle Eastern or South Asian | 79.41 | 15.22 | 1.04 | 2.42 | 0.00 | 1.04 | 0.35 | 0.35 | 0.17 | 0.00 |
| Multiracial | 87.63 | 9.89 | 1.06 | 1.06 | 0.00 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 |
| Native American | 68.97 | 20.69 | 6.90 | 3.45 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| Pacific Islander | 71.19 | 14.69 | 5.08 | 8.47 | 0.00 | 0.56 | 0.00 | 0.00 | 0.00 | 0.00 |
| White | 76.87 | 17.57 | 2.95 | 1.60 | 0.31 | 0.37 | 0.09 | 0.18 | 0.00 | 0.06 |
• Running out of time, but an anova test would be interesting... Now let's write the report